In this notebook, the effect of Dox and 9-TB on the gut microbiome is assessed using data from fecal whole metagenome sequencing.
The analysis focuses on the bacterial community using counts of species derived using the Braken software.
The analysis is done separately at the following 3 time points: - Day -4: 4 days prior IFV infection and 1 day prior Dox/9-TB treatment. - Day 0: The day of IFV infection (samples were collected prior to the infection) and 3 days after the start of the Dox/9-TB treatment. - Day 3: 3 days post IFV infection and 6 days after the start of the Dox/9-TB treatment.
Differences in bacterial species composition are assessed using perMANOVA and visualized using NMDS.
Differences in bacterial species diversity are assessed using the Shannon diversity index (SDI) and richness.
# Load required packages
suppressMessages(library(here))
suppressMessages(library(tidyverse))
suppressMessages(library(readxl))
suppressMessages(library(plotly))
suppressMessages(library(ggpubr))
# R package vegan needed yet not loaded
load(here("data/setup.RData"))
set.seed(1988)
taxa.count.table <- list.files(here("data/metagenome"), pattern = "\\.report_table\\.xlsx") %>%
lapply(function(x){
readxl::read_xlsx(here("data/metagenome", x)) %>%
mutate(SampleID = str_remove(case, "^[A-Z0-9]*-"),
across(where(is.character), as.factor))
}) %>%
bind_rows %>%
# Resolve identical taxa names using an arbitrary index
group_by(SampleID, name) %>%
mutate(taxa = paste0(name, "|", 1:n())) %>%
ungroup
saveRDS(taxa.count.table, here("data/metagenome/taxa_count_table.rds"))
samples.metadata <- read.table(here("data/metagenome/IFV_EPFL_20210817_run_samples.txt"),
sep = "\t",
header = 1,
stringsAsFactors = T) %>%
mutate(SampleID = as.character(SampleID),
TimePoint = as.factor(paste0("Day ", SamplingTimeRelativeToInfection)),
Cage = paste0(TreatmentID,
" - ", InfectionStatus,
" - cage", CageID) %>%
as.factor,
TreatmentLabel = factor(TreatmentLabel,
levels = c("veh",
"Dox 40mpkd",
"9TB 1mpkd"))) %>%
as.data.frame %>%
`row.names<-`(.$SampleID)
saveRDS(samples.metadata, here("data/metagenome/samples_metadata.rds"))
# Split taxa count data into separate matrices for each time point
taxa.count.matrix.per.timepoint.list <- lapply(as.character(levels(samples.metadata$TimePoint)), function(x){
d <- samples.metadata %>%
filter(TimePoint == x) %>%
as.data.frame %>%
`row.names<-`(.$SampleID)
taxa.count.table %>%
filter(domain == "Bacteria") %>%
inner_join(d,
by = "SampleID") %>%
select(SampleID, taxa, rpm) %>%
pivot_wider(id_cols = SampleID, names_from = taxa, values_from = rpm) %>%
mutate(across(where(is.numeric), ~ifelse(is.na(.x), 0, .x))) %>%
as.data.frame %>%
`row.names<-`(.$SampleID) %>%
select(-SampleID) %>%
as.matrix
}) %>%
`names<-`(levels(samples.metadata$TimePoint))
saveRDS(taxa.count.matrix.per.timepoint.list,
here("data/metagenome/taxa_count_matrix_per_timepoint_list.rds"))
t <- taxa.count.table %>%
group_by(SampleID, domain) %>%
summarise(domain.total.rpm = sum(rpm)) %>%
ungroup %>%
group_by(SampleID) %>%
mutate(domain.total.rpm.rel = domain.total.rpm*100/sum(domain.total.rpm)) %>%
ungroup %>%
group_by(domain) %>%
summarise(mean = mean(domain.total.rpm.rel),
sd = sd(domain.total.rpm.rel),
min = min(domain.total.rpm.rel),
max = max(domain.total.rpm.rel)) %>%
ungroup %>%
arrange(-mean) %>%
mutate(across(where(is.numeric), ~round(.x, digits = 3))) %>%
mutate(across(where(is.numeric), ~ifelse(.x > 0.001, paste0(.x, "%"), "<0.001%")))
t.name <- "domain_abundance_summary_table"
t.legend <- "Summary of domains relative representation per sample in whole metagenome sequencing reads."
writeLines(t.legend, here("figs/metagenome_analysis", paste0(t.name, "_legend.txt")))
write.csv(t, here("figs/metagenome_analysis", paste0(t.name, ".csv")), row.names = F)
saveRDS(t, here("data/metagenome_analysis", paste0(t.name, ".rds")))
t
Summary of domains relative representation per sample in whole metagenome sequencing reads.
Compare treatments groups across time points (i.e. before treatment, before IFV infection, after IFV infection).
n.permutations <- 10000
variable <- c("TreatmentLabel")
t.list <- lapply(variable, function(x){
t1 <- lapply(names(taxa.count.matrix.per.timepoint.list), function(y){
m <- taxa.count.matrix.per.timepoint.list[[y]] %>%
{.[row.names(.) %in% samples.metadata$SampleID, ]} %>%
{.[, colSums(.) > 0]}
data <- samples.metadata[row.names(m), ]
vegan::adonis(as.formula(paste0("m ~ ", x)),
data = data, permutations = n.permutations,
method = "bray")$aov.tab %>%
as.data.frame %>%
mutate(Term = row.names(.),
sign.sym. = symnum(`Pr(>F)`, corr = FALSE, na = FALSE,
cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1),
symbols = c("****", "***", "**", "*", "ns")),
TimePoint = y,
Comparison = paste(paste0( "'", unique(data[[x]]), "'"),
collapse = " vs ")) %>%
select(Term, TimePoint, Comparison, Df, SumsOfSqs, MeanSqs,
F.Model, R2, p = `Pr(>F)`, sign.sym.)
}) %>%
bind_rows
t2 <- lapply(names(taxa.count.matrix.per.timepoint.list), function(y){
m <- taxa.count.matrix.per.timepoint.list[[y]] %>%
{.[row.names(.) %in% samples.metadata$SampleID, ]}
data <- samples.metadata[row.names(m), ]
combn(as.character(unique(data[[x]])), m = 2, simplify = F) %>%
lapply(function(z){
d1 <- data %>%
filter(.[[x]] %in% z)
m1 <- m[as.character(d1$SampleID), ] %>%
{.[, colSums(.) > 0]}
vegan::adonis(as.formula(paste0("m1 ~ ", x)), data = d1,
permutations = n.permutations,
method = "bray")$aov.tab %>%
as.data.frame %>%
mutate(Term = row.names(.),
Comparison = paste(paste0( "'", z, "'"),
collapse = " vs "))
}) %>%
bind_rows %>%
mutate(p.adj = p.adjust(.$`Pr(>F)`, method="BH"),
sign.sym. = symnum(p.adj, corr = FALSE, na = FALSE,
cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
symbols = c("****", "***", "**", "*", "ns")),
TimePoint = y) %>%
select(Term, TimePoint, Comparison, Df, SumsOfSqs, MeanSqs,
F.Model, R2, p = `Pr(>F)`, p.adj, sign.sym.)
}) %>%
bind_rows
list(aov.tab = t1,
aov.tab.posthoc = t2)
}) %>%
`names<-`(variable)
t <- bind_rows(t.list$TreatmentLabel$aov.tab,
t.list$TreatmentLabel$aov.tab.posthoc) %>%
filter(!Term %in% c("Residuals", "Total")) %>%
select(TimePoint, Comparison, R2, p, p.adj, sign.sym.) %>%
mutate(across(where(is.numeric), ~ifelse(.x < 0.0001,
"<0.0001",
as.character(round(.x, digits = 4)))),
across(everything(), ~ifelse(is.na(.x), "NA", .x)))
t.name <- "adonis_out"
t.legend <- paste0("Comparison of bacterial community composition by permutational multivariate analysis of variance (perMANOVA) based on the Bray-Curtis dissimilarity with ", n.permutations, " permutations. P-values adjusted for multiple comparisons using the Benjamini and Hochberg method. '****': p < 0.0001/ p.adj < 0.001, '***': p < 0.001/p.adj < 0.01, '**': p < 0.01/p.adj < 0.05, '*': p < 0.05/p.adj < 0.1, 'ns': p > 0.05/p.adj > 0.1.")
writeLines(t.legend, here("figs/metagenome_analysis", paste0(t.name, "_legend.txt")))
saveRDS(t.list, here("data/metagenome_analysis", paste0(t.name, "_list.rds")))
saveRDS(t, here("data/metagenome_analysis", paste0(t.name, ".rds")))
lapply(names(t.list), function(x){
lapply(names(t.list[[x]]), function(y){
write.csv(y, here("figs/metagenome_analysis", paste0(t.name,
"_", x,
"_", y,
".csv")), row.names = F)
})
})
write.csv(t, here("figs/metagenome_analysis", paste0(t.name, ".csv")), row.names = F)
t
Comparison of bacterial community composition by permutational multivariate analysis of variance (perMANOVA) based on the Bray-Curtis dissimilarity with 10000 permutations. P-values adjusted for multiple comparisons using the Benjamini and Hochberg method. ‘****’: p < 0.0001/ p.adj < 0.001, ‘’: p < 0.001/p.adj < 0.01, ’’: p < 0.01/p.adj < 0.05, ’’: p < 0.05/p.adj < 0.1, ‘ns’: p > 0.05/p.adj > 0.1.
nmds.out <- lapply(names(taxa.count.matrix.per.timepoint.list), function(x){
taxa.count.matrix.per.timepoint.list[[x]] %>%
{.[row.names(.) %in% samples.metadata$SampleID, ]} %>%
{.[, colSums(.) > 0]} %>%
vegan::metaMDS(k = 2,
distance = "bray")
}) %>%
`names<-`(names(taxa.count.matrix.per.timepoint.list))
saveRDS(nmds.out, here("data/metagenome_analysis/nmds_out.rds"))
# Check if converged
lapply(nmds.out, function(x){x$converged})
## $`Day -4`
## [1] TRUE
##
## $`Day 0`
## [1] TRUE
##
## $`Day 3`
## [1] TRUE
p <- lapply(names(nmds.out), function(x){
nmds.out[[x]]$points %>%
as.data.frame %>%
mutate(SampleID = row.names(.))
}) %>%
bind_rows %>%
left_join(samples.metadata,
by = "SampleID") %>%
mutate(Treatment = TreatmentLabel) %>%
{
hull <- group_by(., TimePoint, Treatment) %>%
slice(chull(MDS1, MDS2))
ggplot(., aes(x = MDS1, y = MDS2)) +
theme +
facet_grid(~TimePoint) +
geom_hline(yintercept = 0) +
geom_vline(xintercept = 0) +
geom_polygon(data = hull,
aes(fill = Treatment),
alpha = 0.5,
lwd = 0.5,
color = "black") +
geom_point(aes(fill = Treatment),
pch = 21, size = 3, alpha = 0.8) +
scale_fill_manual(values = annotation.colors$TreatmentLabel) +
coord_cartesian(clip = "off")
}
p.name <- "nmds"
p.legend <- "Comparison of bacterial community composition by non-metric multidimensional scaling (NMDS) based on the Bray-Curtis dissimilarity."
writeLines(p.legend, here("figs/metagenome_analysis", paste0(p.name, "_legend.txt")))
saveRDS(p, here("figs/metagenome_analysis", paste0(p.name, ".rds")))
ggsave(here("figs/metagenome_analysis", paste0(p.name, ".pdf")),
p,
device = "pdf",
width = fig.layout$width.double,
height = 0.5*fig.layout$width.single,
units = "mm")
ggplotly(p)
Comparison of bacterial community composition by non-metric multidimensional scaling (NMDS) based on the Bray-Curtis dissimilarity.
diversity <- lapply(names(taxa.count.matrix.per.timepoint.list), function(x){
taxa.count.matrix.per.timepoint.list[[x]] %>%
{.[row.names(.) %in% samples.metadata$SampleID, ]} %>%
{.[, colSums(.) != 0]} %>%
{
data.frame(SampleID = row.names(.),
SDI = vegan::diversity(.),
Richness = rowSums(. != 0))
} %>%
pivot_longer(cols = c("SDI", "Richness"), names_to = "var", values_to = "value") %>%
mutate(var = factor(var, levels = c("SDI", "Richness")))
}) %>%
bind_rows
saveRDS(diversity, here("data/metagenome_analysis/diversity.rds"))
p <- diversity %>%
left_join(samples.metadata,
by = "SampleID") %>%
mutate(Treatment = TreatmentLabel) %>%
{
ggplot(., aes(x = Treatment, y = value,
group = Treatment,
CageID = CageID)) +
theme +
facet_grid(var~TimePoint, scales = "free_y") +
geom_boxplot(aes(fill = Treatment),
outlier.shape = NA) +
geom_jitter(aes(SampleID = SampleID,
AnimalID = AnimalID),
size = 0.5,
width = 0.1) +
stat_compare_means(label = "p.format",
label.x.npc = "left",
label.y.npc = "bottom") +
stat_compare_means(comparisons = combn(levels(.$Treatment), 2, simplify = F),
label = "p.signif",
method = "wilcox") +
# Hack some space for stats
geom_point(data = . %>%
group_by(var, Treatment, CageID) %>%
summarise(value = c(1.1*max(value), 0.95*min(value))),
x = NA) +
scale_fill_manual(values = annotation.colors$TreatmentLabel) +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
coord_cartesian(expand = T, clip = "off")
}
p.name <- "diversity"
p.legend <- "Comparison of bacterial species diversity in terms of Shannon diversity index (SDI) and richness. Statistical significance assessed by Kruskal-Wallis test and post-hoc Wilcoxon test. '****': p < 0.0001, '***': p < 0.001, '**': p < 0.01, '*': p < 0.05, 'ns': p > 0.05."
writeLines(p.legend, here("figs/metagenome_analysis", paste0(p.name, "_legend.txt")))
saveRDS(p, here("figs/metagenome_analysis", paste0(p.name, ".rds")))
ggsave(here("figs/metagenome_analysis", paste0(p.name, ".pdf")),
p,
device = "pdf",
width = fig.layout$width.single,
height = fig.layout$width.single,
units = "mm")
p
Comparison of bacterial species diversity in terms of Shannon diversity index (SDI) and richness. Statistical significance assessed by Kruskal-Wallis test and post-hoc Wilcoxon test. ‘****’: p < 0.0001, ‘’: p < 0.001, ’’: p < 0.01, ’’: p < 0.05, ‘ns’: p > 0.05.
sessionInfo()
## R version 4.0.0 (2020-04-24)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices datasets utils methods base
##
## other attached packages:
## [1] ggpubr_0.2.1 magrittr_2.0.2 plotly_4.10.0 readxl_1.3.1
## [5] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.8 purrr_0.3.4
## [9] readr_2.1.2 tidyr_1.2.0 tibble_3.1.6 ggplot2_3.3.5
## [13] tidyverse_1.3.1 here_1.0.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.8.3 lubridate_1.8.0 assertthat_0.2.1 rprojroot_2.0.2
## [5] digest_0.6.29 utf8_1.2.2 R6_2.5.1 cellranger_1.1.0
## [9] backports_1.4.1 reprex_2.0.1 evaluate_0.15 highr_0.9
## [13] httr_1.4.2 pillar_1.7.0 rlang_1.0.2 lazyeval_0.2.2
## [17] rstudioapi_0.13 data.table_1.14.2 jquerylib_0.1.4 rmarkdown_2.13
## [21] labeling_0.4.2 htmlwidgets_1.5.4 munsell_0.5.0 broom_0.7.12
## [25] compiler_4.0.0 modelr_0.1.8 xfun_0.30 pkgconfig_2.0.3
## [29] htmltools_0.5.2 tidyselect_1.1.2 viridisLite_0.4.0 fansi_1.0.2
## [33] crayon_1.5.0 tzdb_0.2.0 dbplyr_2.1.1 withr_2.5.0
## [37] grid_4.0.0 jsonlite_1.8.0 gtable_0.3.0 lifecycle_1.0.1
## [41] DBI_1.1.2 scales_1.1.1 cli_3.2.0 stringi_1.7.6
## [45] farver_2.1.0 ggsignif_0.6.3 renv_0.13.1 fs_1.5.2
## [49] xml2_1.3.3 bslib_0.3.1 ellipsis_0.3.2 generics_0.1.2
## [53] vctrs_0.3.8 tools_4.0.0 glue_1.6.2 crosstalk_1.2.0
## [57] hms_1.1.1 fastmap_1.1.0 yaml_2.3.5 colorspace_2.0-3
## [61] rvest_1.0.2 knitr_1.37 haven_2.4.3 sass_0.4.0